[19]:
# Attempt with the week 7 generated example data
# Generate time-series data
import pandas as pd
import matplotlib.pyplot as plt
data = {
'Date': pd.date_range(start='2024-01-01', periods=10, freq='D'),
'Sales': [200, 250, 270, 300, 400, 410, 380, 350, 370, 420]
}
df = pd.DataFrame(data)
# Plotting the sales
plt.figure(figsize=(10, 5))
plt.plot(df['Date'], df['Sales'], label='Sales')
plt.title('Sales Over Time')
plt.xlabel('Date')
plt.ylabel('Sale')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
[9]:
# Day 1: Visualizing Time-Series Data
# Task 1: Plotting Stock Prices over Time
# Generate time-series data
# Sample dataset (replace with actual data)
import pandas as pd
import matplotlib.pyplot as plt
# Generate time-series data
data = {
'Date': pd.date_range(start='2023-01-01', periods=365, freq='D'),
'Stock Price': [100 + x + (x % 30) for x in range(365)]
}
df = pd.DataFrame(data)
# Simulate missing data
df.loc[100:120, 'Stock Price'] = None # 'Stock Price' column had missing values
# Fill missing values by forward fill
df['Stock Price'].ffill(inplace=True)
# Plotting the stock prices
plt.figure(figsize=(10, 6))
plt.plot(df['Date'], df['Stock Price'], label='Stock Price')
plt.title('Stock Prices Over Time')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
[20]:
print(df.columns)
Index(['Date', 'Sales'], dtype='object')
[25]:
# Task 2: Visualizing Seasonality and Trends
# Solution 1: Use period=30 for Monthly Seasonality (One Year of Data)
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
# Generate time-series data
data = {
'Date': pd.date_range(start='2023-01-01', periods=365, freq='D'),
'Stock Price': [100 + x + (x % 30) for x in range(365)]
}
df = pd.DataFrame(data)
# Ensure the 'Stock Price' column is numeric
df['Stock Price'] = pd.to_numeric(df['Stock Price'], errors='coerce')
# Perform seasonal decomposition with period=30 (monthly seasonality)
result = seasonal_decompose(df['Stock Price'], model='multiplicative', period=30)
# Plot the decomposition
result.plot()
plt.show()
[26]:
# Task 2: Visualizing Seasonality and Trends
# Solution 2: Extend the Data to 2 Years (730 Observations) for Yearly Seasonality
# Generate two years of time-series data (730 observations)
data = {
'Date': pd.date_range(start='2023-01-01', periods=730, freq='D'), # Two years of daily data
'Stock Price': [100 + x + (x % 30) for x in range(730)] # Extended to two years
}
df = pd.DataFrame(data)
# Ensure the 'Stock Price' column is numeric
df['Stock Price'] = pd.to_numeric(df['Stock Price'], errors='coerce')
# Perform seasonal decomposition with period=365 (two years of data)
result = seasonal_decompose(df['Stock Price'], model='multiplicative', period=365)
# Plot the decomposition
result.plot()
plt.show()
[27]:
# Task 3: Visualizing Daily Temperature Trends
from statsmodels.tsa.seasonal import seasonal_decompose
# Assuming df['Stock Price'] is already in the correct format
result = seasonal_decompose(df['Stock Price'], model='multiplicative', period=365)
# Plot the decomposition
result.plot()
plt.show()
# Explanation of components:
# Trend: The underlying trend over time.
# Seasonality: Patterns that repeat at regular intervals (e.g., yearly).
# Residual: What's left after removing trend and seasonality.
[29]:
# Task 4: Handling Irregular Time Intervals
# Sample dataset with irregular intervals
df_irregular = pd.DataFrame({
'Date': ['2023-01-01', '2023-01-03', '2023-01-05', '2023-01-07'],
'Stock Price': [100, 105, 110, 115]
})
df_irregular['Date'] = pd.to_datetime(df_irregular['Date'])
df_irregular.set_index('Date', inplace=True)
# Interpolate missing dates
df_irregular_resampled = df_irregular.resample('D').interpolate(method='linear')
# Plotting the result
plt.figure(figsize=(8, 6))
plt.plot(df_irregular_resampled.index, df_irregular_resampled['Stock Price'], label='Interpolated Stock Prices')
plt.title('Stock Prices with Interpolated Missing Data')
plt.xlabel('Date')
plt.ylabel('Price')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
[3]:
!pip install folium
Defaulting to user installation because normal site-packages is not writeable Looking in links: /usr/share/pip-wheels Collecting folium Downloading folium-0.18.0-py2.py3-none-any.whl.metadata (3.8 kB) Collecting branca>=0.6.0 (from folium) Downloading branca-0.8.0-py3-none-any.whl.metadata (1.5 kB) Requirement already satisfied: jinja2>=2.9 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from folium) (3.1.3) Requirement already satisfied: numpy in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from folium) (1.26.4) Requirement already satisfied: requests in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from folium) (2.31.0) Requirement already satisfied: xyzservices in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from folium) (2022.9.0) Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from jinja2>=2.9->folium) (2.1.3) Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from requests->folium) (2.0.4) Requirement already satisfied: idna<4,>=2.5 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from requests->folium) (3.4) Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from requests->folium) (2.0.7) Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from requests->folium) (2024.2.2) Downloading folium-0.18.0-py2.py3-none-any.whl (108 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 108.9/108.9 kB 2.0 MB/s eta 0:00:00.1 MB/s eta 0:00:01 Downloading branca-0.8.0-py3-none-any.whl (25 kB) Installing collected packages: branca, folium Successfully installed branca-0.8.0 folium-0.18.0
[4]:
!pip show folium
Name: folium Version: 0.18.0 Summary: Make beautiful maps with Leaflet.js & Python Home-page: https://github.com/python-visualization/folium Author: Rob Story Author-email: wrobstory@gmail.com License: MIT Location: /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages Requires: branca, jinja2, numpy, requests, xyzservices Required-by:
[6]:
import sys
print(sys.executable)
/opt/conda/envs/anaconda-2024.02-py310/bin/python
[7]:
!which python
/opt/conda/envs/anaconda-2024.02-py310/bin/python
[8]:
!pip install --upgrade --user folium
Looking in links: /usr/share/pip-wheels Requirement already satisfied: folium in ./.local/lib/python3.10/site-packages (0.18.0) Requirement already satisfied: branca>=0.6.0 in ./.local/lib/python3.10/site-packages (from folium) (0.8.0) Requirement already satisfied: jinja2>=2.9 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from folium) (3.1.3) Requirement already satisfied: numpy in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from folium) (1.26.4) Requirement already satisfied: requests in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from folium) (2.31.0) Requirement already satisfied: xyzservices in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from folium) (2022.9.0) Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from jinja2>=2.9->folium) (2.1.3) Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from requests->folium) (2.0.4) Requirement already satisfied: idna<4,>=2.5 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from requests->folium) (3.4) Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from requests->folium) (2.0.7) Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from requests->folium) (2024.2.2)
[1]:
!pip install folium
Defaulting to user installation because normal site-packages is not writeable Looking in links: /usr/share/pip-wheels Requirement already satisfied: folium in ./.local/lib/python3.10/site-packages (0.18.0) Requirement already satisfied: branca>=0.6.0 in ./.local/lib/python3.10/site-packages (from folium) (0.8.0) Requirement already satisfied: jinja2>=2.9 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from folium) (3.1.3) Requirement already satisfied: numpy in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from folium) (1.26.4) Requirement already satisfied: requests in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from folium) (2.31.0) Requirement already satisfied: xyzservices in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from folium) (2022.9.0) Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from jinja2>=2.9->folium) (2.1.3) Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from requests->folium) (2.0.4) Requirement already satisfied: idna<4,>=2.5 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from requests->folium) (3.4) Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from requests->folium) (2.0.7) Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from requests->folium) (2024.2.2)
[2]:
import folium
print(folium.__version__)
0.18.0
[5]:
import pandas as pd
import folium
# Sample dataset (replace with actual data)
cities = {
'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'],
'Latitude': [40.7128, 34.0522, 41.8781, 29.7604, 33.4484],
'Longitude': [-74.0060, -118.2437, -87.6298, -95.3698, -112.0740],
'Sales': [1000, 1500, 1200, 1300, 1100]
}
# Convert the dictionary to a DataFrame
df_cities = pd.DataFrame(cities)
# Create a map centered around the US
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)
# Add markers for each city on the map
for _, city in df_cities.iterrows():
folium.Marker(
location=[city['Latitude'], city['Longitude']],
popup=f"City: {city['City']}<br>Sales: {city['Sales']}",
).add_to(m)
# Display the map
m.save("sales_map.html")
[6]:
from IPython.display import IFrame
# Display the saved HTML map in the notebook
IFrame('sales_map.html', width=700, height=500)
[6]:
[1]:
import folium
import pandas as pd
from IPython.display import IFrame # Import IFrame for displaying the map inline
# Updated dataset with Africa included
data = {
'Country': ['USA', 'China', 'India', 'Brazil', 'Germany', 'UK', 'France', 'Italy', 'Spain', 'Australia', 'Nigeria', 'South Africa'],
'Population': [331002651, 1439323776, 1380004385, 212559417, 83783942, 67886011, 65273511, 60244639, 46719142, 25499884, 206139589, 59308690],
'Region': ['North America', 'Asia', 'Asia', 'South America', 'Europe', 'Europe', 'Europe', 'Europe', 'Europe', 'Oceania', 'Africa', 'Africa'],
'Latitude': [37.0902, 35.8617, 20.5937, -14.2350, 51.1657, 55.3781, 46.6034, 41.8719, 40.4637, -25.2744, 9.0820, -30.5595],
'Longitude': [-95.7129, 104.1954, 78.9629, -51.9253, 10.4515, -3.4360, 2.2137, 12.5674, -3.7492, 133.7751, 8.6753, 22.9375]
}
# Convert to DataFrame
df = pd.DataFrame(data)
# Create a base map centered around a location with an appropriate zoom level
world_map = folium.Map(location=[20, 0], zoom_start=2)
# Add population circles to the map with tooltips
for idx, row in df.iterrows():
folium.CircleMarker(
location=[row['Latitude'], row['Longitude']],
radius=row['Population'] ** 0.5 / 1000, # Scale radius by population for better visualization
color='blue',
fill=True,
fill_color='blue',
fill_opacity=0.7,
tooltip=f"{row['Country']}<br>Population: {row['Population']:,}" # Tooltip for hover effect
).add_to(world_map)
# Save map as an HTML file
world_map.save("population_map.html")
# Display map inline in Jupyter
IFrame("population_map.html", width=700, height=500)
[1]:
[8]:
!pip install geopandas
Defaulting to user installation because normal site-packages is not writeable Looking in links: /usr/share/pip-wheels Collecting geopandas Downloading geopandas-1.0.1-py3-none-any.whl.metadata (2.2 kB) Requirement already satisfied: numpy>=1.22 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from geopandas) (1.26.4) Collecting pyogrio>=0.7.2 (from geopandas) Downloading pyogrio-0.10.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (5.5 kB) Requirement already satisfied: packaging in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from geopandas) (23.2) Requirement already satisfied: pandas>=1.4.0 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from geopandas) (2.1.4) Collecting pyproj>=3.3.0 (from geopandas) Downloading pyproj-3.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (31 kB) Collecting shapely>=2.0.0 (from geopandas) Downloading shapely-2.0.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB) Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from pandas>=1.4.0->geopandas) (2.8.2) Requirement already satisfied: pytz>=2020.1 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from pandas>=1.4.0->geopandas) (2023.3.post1) Requirement already satisfied: tzdata>=2022.1 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from pandas>=1.4.0->geopandas) (2023.3) Requirement already satisfied: certifi in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from pyogrio>=0.7.2->geopandas) (2024.2.2) Requirement already satisfied: six>=1.5 in /opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas>=1.4.0->geopandas) (1.16.0) Downloading geopandas-1.0.1-py3-none-any.whl (323 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 323.6/323.6 kB 7.7 MB/s eta 0:00:00 0:00:01 Downloading pyogrio-0.10.0-cp310-cp310-manylinux_2_28_x86_64.whl (23.9 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 23.9/23.9 MB 47.3 MB/s eta 0:00:00 0:00:01[36m0:00:01 Downloading pyproj-3.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.2 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 9.2/9.2 MB 85.3 MB/s eta 0:00:00 0:00:01 Downloading shapely-2.0.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.5 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.5/2.5 MB 44.3 MB/s eta 0:00:00 0:00:01 Installing collected packages: shapely, pyproj, pyogrio, geopandas WARNING: The script pyproj is installed in '/home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/bin' which is not on PATH. Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location. Successfully installed geopandas-1.0.1 pyogrio-0.10.0 pyproj-3.7.0 shapely-2.0.6
[9]:
import geopandas as gpd
print(gpd.__version__) # Print version to verify
1.0.1
[28]:
import os
print(os.path.abspath("C:/Users/HP USER/Downloads/Countries/ne_10m_admin_0_countries.shp"))
/home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/C:/Users/HP USER/Downloads/Countries/ne_10m_admin_0_countries.shp
[30]:
import os
print(os.getcwd()) # This shows the current working directory
/home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b
[31]:
# Assuming you uploaded the files to the correct path in the working directory
shapefile_path = '/home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/ne_10m_admin_0_countries.shp'
world = gpd.read_file(shapefile_path)
print(world.head())
featurecla scalerank LABELRANK SOVEREIGNT SOV_A3 ADM0_DIF LEVEL \
0 Admin-0 country 0 2 Indonesia IDN 0 2
1 Admin-0 country 0 3 Malaysia MYS 0 2
2 Admin-0 country 0 2 Chile CHL 0 2
3 Admin-0 country 0 3 Bolivia BOL 0 2
4 Admin-0 country 0 2 Peru PER 0 2
TYPE TLC ADMIN ... FCLASS_TR FCLASS_ID FCLASS_PL \
0 Sovereign country 1 Indonesia ... None None None
1 Sovereign country 1 Malaysia ... None None None
2 Sovereign country 1 Chile ... None None None
3 Sovereign country 1 Bolivia ... None None None
4 Sovereign country 1 Peru ... None None None
FCLASS_GR FCLASS_IT FCLASS_NL FCLASS_SE FCLASS_BD FCLASS_UA \
0 None None None None None None
1 None None None None None None
2 None None None None None None
3 None None None None None None
4 None None None None None None
geometry
0 MULTIPOLYGON (((117.70361 4.16341, 117.70361 4...
1 MULTIPOLYGON (((117.70361 4.16341, 117.69711 4...
2 MULTIPOLYGON (((-69.51009 -17.50659, -69.50611...
3 POLYGON ((-69.51009 -17.50659, -69.51009 -17.5...
4 MULTIPOLYGON (((-69.51009 -17.50659, -69.63832...
[5 rows x 169 columns]
[39]:
# Check the geometry types
print(world.geometry.geom_type.value_counts())
MultiPolygon 7 Polygon 2 Name: count, dtype: int64
[40]:
from shapely.geometry import Polygon, MultiPolygon
# Apply buffer(0) to invalid geometries of type Polygon and MultiPolygon
world['geometry'] = world['geometry'].apply(
lambda geom: geom.buffer(0) if isinstance(geom, (Polygon, MultiPolygon)) and not geom.is_valid else geom
)
# Verify again if the geometries are valid
print(world.is_valid)
# Plot the world map after fixing geometries
world.plot(figsize=(10, 10))
plt.show()
0 True
1 True
2 True
3 True
4 True
...
253 False
254 False
255 False
256 False
257 False
Length: 258, dtype: bool
[41]:
# Find and inspect invalid geometries
invalid_geometries = world[~world.is_valid]
print(invalid_geometries)
# Optional: Inspect the geometries in detail
for idx, row in invalid_geometries.iterrows():
print(f"Invalid geometry at index {idx}: {row['geometry']}")
featurecla scalerank LABELRANK SOVEREIGNT \
9 Admin-0 country 0 2 China
10 Admin-0 country 0 4 Israel
11 Admin-0 country 0 5 Israel
12 Admin-0 country 0 5 Lebanon
13 Admin-0 country 0 2 Ethiopia
.. ... ... ... ...
253 Admin-0 country 0 4 China
254 Admin-0 country 6 5 Australia
255 Admin-0 country 6 8 Bajo Nuevo Bank (Petrel Is.)
256 Admin-0 country 6 5 Serranilla Bank
257 Admin-0 country 6 6 Scarborough Reef
SOV_A3 ADM0_DIF LEVEL TYPE TLC \
9 CH1 1 2 Country 1
10 IS1 1 2 Disputed 1
11 IS1 1 2 Indeterminate 1
12 LBN 0 2 Sovereign country 1
13 ETH 0 2 Sovereign country 1
.. ... ... ... ... ..
253 CH1 1 2 Country 1
254 AU1 1 2 Dependency 1
255 BJN 0 2 Indeterminate 1
256 SER 0 2 Indeterminate 1
257 SCR 0 2 Indeterminate 1
ADMIN ... FCLASS_TR FCLASS_ID \
9 China ... None None
10 Israel ... None None
11 Palestine ... None None
12 Lebanon ... None None
13 Ethiopia ... None None
.. ... ... ... ...
253 Macao S.A.R ... None None
254 Ashmore and Cartier Islands ... None None
255 Bajo Nuevo Bank (Petrel Is.) ... Unrecognized Unrecognized
256 Serranilla Bank ... Unrecognized Unrecognized
257 Scarborough Reef ... None None
FCLASS_PL FCLASS_GR FCLASS_IT FCLASS_NL FCLASS_SE \
9 None None None None None
10 None None None None None
11 None None None None None
12 None None None None None
13 None None None None None
.. ... ... ... ... ...
253 None None None None None
254 None None None None None
255 Unrecognized Unrecognized Unrecognized Unrecognized Unrecognized
256 Unrecognized Unrecognized Unrecognized Unrecognized Unrecognized
257 None None None None None
FCLASS_BD FCLASS_UA geometry
9 None None None
10 Unrecognized None None
11 Admin-0 country None None
12 None None None
13 None None None
.. ... ... ...
253 None None None
254 None None None
255 Unrecognized Unrecognized None
256 Unrecognized Unrecognized None
257 None None None
[249 rows x 169 columns]
Invalid geometry at index 9: None
Invalid geometry at index 10: None
Invalid geometry at index 11: None
Invalid geometry at index 12: None
Invalid geometry at index 13: None
Invalid geometry at index 14: None
Invalid geometry at index 15: None
Invalid geometry at index 16: None
Invalid geometry at index 17: None
Invalid geometry at index 18: None
Invalid geometry at index 19: None
Invalid geometry at index 20: None
Invalid geometry at index 21: None
Invalid geometry at index 22: None
Invalid geometry at index 23: None
Invalid geometry at index 24: None
Invalid geometry at index 25: None
Invalid geometry at index 26: None
Invalid geometry at index 27: None
Invalid geometry at index 28: None
Invalid geometry at index 29: None
Invalid geometry at index 30: None
Invalid geometry at index 31: None
Invalid geometry at index 32: None
Invalid geometry at index 33: None
Invalid geometry at index 34: None
Invalid geometry at index 35: None
Invalid geometry at index 36: None
Invalid geometry at index 37: None
Invalid geometry at index 38: None
Invalid geometry at index 39: None
Invalid geometry at index 40: None
Invalid geometry at index 41: None
Invalid geometry at index 42: None
Invalid geometry at index 43: None
Invalid geometry at index 44: None
Invalid geometry at index 45: None
Invalid geometry at index 46: None
Invalid geometry at index 47: None
Invalid geometry at index 48: None
Invalid geometry at index 49: None
Invalid geometry at index 50: None
Invalid geometry at index 51: None
Invalid geometry at index 52: None
Invalid geometry at index 53: None
Invalid geometry at index 54: None
Invalid geometry at index 55: None
Invalid geometry at index 56: None
Invalid geometry at index 57: None
Invalid geometry at index 58: None
Invalid geometry at index 59: None
Invalid geometry at index 60: None
Invalid geometry at index 61: None
Invalid geometry at index 62: None
Invalid geometry at index 63: None
Invalid geometry at index 64: None
Invalid geometry at index 65: None
Invalid geometry at index 66: None
Invalid geometry at index 67: None
Invalid geometry at index 68: None
Invalid geometry at index 69: None
Invalid geometry at index 70: None
Invalid geometry at index 71: None
Invalid geometry at index 72: None
Invalid geometry at index 73: None
Invalid geometry at index 74: None
Invalid geometry at index 75: None
Invalid geometry at index 76: None
Invalid geometry at index 77: None
Invalid geometry at index 78: None
Invalid geometry at index 79: None
Invalid geometry at index 80: None
Invalid geometry at index 81: None
Invalid geometry at index 82: None
Invalid geometry at index 83: None
Invalid geometry at index 84: None
Invalid geometry at index 85: None
Invalid geometry at index 86: None
Invalid geometry at index 87: None
Invalid geometry at index 88: None
Invalid geometry at index 89: None
Invalid geometry at index 90: None
Invalid geometry at index 91: None
Invalid geometry at index 92: None
Invalid geometry at index 93: None
Invalid geometry at index 94: None
Invalid geometry at index 95: None
Invalid geometry at index 96: None
Invalid geometry at index 97: None
Invalid geometry at index 98: None
Invalid geometry at index 99: None
Invalid geometry at index 100: None
Invalid geometry at index 101: None
Invalid geometry at index 102: None
Invalid geometry at index 103: None
Invalid geometry at index 104: None
Invalid geometry at index 105: None
Invalid geometry at index 106: None
Invalid geometry at index 107: None
Invalid geometry at index 108: None
Invalid geometry at index 109: None
Invalid geometry at index 110: None
Invalid geometry at index 111: None
Invalid geometry at index 112: None
Invalid geometry at index 113: None
Invalid geometry at index 114: None
Invalid geometry at index 115: None
Invalid geometry at index 116: None
Invalid geometry at index 117: None
Invalid geometry at index 118: None
Invalid geometry at index 119: None
Invalid geometry at index 120: None
Invalid geometry at index 121: None
Invalid geometry at index 122: None
Invalid geometry at index 123: None
Invalid geometry at index 124: None
Invalid geometry at index 125: None
Invalid geometry at index 126: None
Invalid geometry at index 127: None
Invalid geometry at index 128: None
Invalid geometry at index 129: None
Invalid geometry at index 130: None
Invalid geometry at index 131: None
Invalid geometry at index 132: None
Invalid geometry at index 133: None
Invalid geometry at index 134: None
Invalid geometry at index 135: None
Invalid geometry at index 136: None
Invalid geometry at index 137: None
Invalid geometry at index 138: None
Invalid geometry at index 139: None
Invalid geometry at index 140: None
Invalid geometry at index 141: None
Invalid geometry at index 142: None
Invalid geometry at index 143: None
Invalid geometry at index 144: None
Invalid geometry at index 145: None
Invalid geometry at index 146: None
Invalid geometry at index 147: None
Invalid geometry at index 148: None
Invalid geometry at index 149: None
Invalid geometry at index 150: None
Invalid geometry at index 151: None
Invalid geometry at index 152: None
Invalid geometry at index 153: None
Invalid geometry at index 154: None
Invalid geometry at index 155: None
Invalid geometry at index 156: None
Invalid geometry at index 157: None
Invalid geometry at index 158: None
Invalid geometry at index 159: None
Invalid geometry at index 160: None
Invalid geometry at index 161: None
Invalid geometry at index 162: None
Invalid geometry at index 163: None
Invalid geometry at index 164: None
Invalid geometry at index 165: None
Invalid geometry at index 166: None
Invalid geometry at index 167: None
Invalid geometry at index 168: None
Invalid geometry at index 169: None
Invalid geometry at index 170: None
Invalid geometry at index 171: None
Invalid geometry at index 172: None
Invalid geometry at index 173: None
Invalid geometry at index 174: None
Invalid geometry at index 175: None
Invalid geometry at index 176: None
Invalid geometry at index 177: None
Invalid geometry at index 178: None
Invalid geometry at index 179: None
Invalid geometry at index 180: None
Invalid geometry at index 181: None
Invalid geometry at index 182: None
Invalid geometry at index 183: None
Invalid geometry at index 184: None
Invalid geometry at index 185: None
Invalid geometry at index 186: None
Invalid geometry at index 187: None
Invalid geometry at index 188: None
Invalid geometry at index 189: None
Invalid geometry at index 190: None
Invalid geometry at index 191: None
Invalid geometry at index 192: None
Invalid geometry at index 193: None
Invalid geometry at index 194: None
Invalid geometry at index 195: None
Invalid geometry at index 196: None
Invalid geometry at index 197: None
Invalid geometry at index 198: None
Invalid geometry at index 199: None
Invalid geometry at index 200: None
Invalid geometry at index 201: None
Invalid geometry at index 202: None
Invalid geometry at index 203: None
Invalid geometry at index 204: None
Invalid geometry at index 205: None
Invalid geometry at index 206: None
Invalid geometry at index 207: None
Invalid geometry at index 208: None
Invalid geometry at index 209: None
Invalid geometry at index 210: None
Invalid geometry at index 211: None
Invalid geometry at index 212: None
Invalid geometry at index 213: None
Invalid geometry at index 214: None
Invalid geometry at index 215: None
Invalid geometry at index 216: None
Invalid geometry at index 217: None
Invalid geometry at index 218: None
Invalid geometry at index 219: None
Invalid geometry at index 220: None
Invalid geometry at index 221: None
Invalid geometry at index 222: None
Invalid geometry at index 223: None
Invalid geometry at index 224: None
Invalid geometry at index 225: None
Invalid geometry at index 226: None
Invalid geometry at index 227: None
Invalid geometry at index 228: None
Invalid geometry at index 229: None
Invalid geometry at index 230: None
Invalid geometry at index 231: None
Invalid geometry at index 232: None
Invalid geometry at index 233: None
Invalid geometry at index 234: None
Invalid geometry at index 235: None
Invalid geometry at index 236: None
Invalid geometry at index 237: None
Invalid geometry at index 238: None
Invalid geometry at index 239: None
Invalid geometry at index 240: None
Invalid geometry at index 241: None
Invalid geometry at index 242: None
Invalid geometry at index 243: None
Invalid geometry at index 244: None
Invalid geometry at index 245: None
Invalid geometry at index 246: None
Invalid geometry at index 247: None
Invalid geometry at index 248: None
Invalid geometry at index 249: None
Invalid geometry at index 250: None
Invalid geometry at index 251: None
Invalid geometry at index 252: None
Invalid geometry at index 253: None
Invalid geometry at index 254: None
Invalid geometry at index 255: None
Invalid geometry at index 256: None
Invalid geometry at index 257: None
[42]:
from shapely.geometry import Polygon
# Replace None geometries with an empty Polygon
world['geometry'] = world['geometry'].apply(lambda geom: Polygon() if geom is None else geom)
# Verify all geometries are now valid
print(world.is_valid)
# Plot the map
world.plot(figsize=(10, 10))
plt.show()
0 True
1 True
2 True
3 True
4 True
...
253 True
254 True
255 True
256 True
257 True
Length: 258, dtype: bool
[47]:
# Remove invalid geometries from the dataset
valid_world = world[world.is_valid]
# Save the cleaned dataset
valid_file_path = '/home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/valid_ne_10m_admin_0_countries.shp'
valid_world.to_file(valid_file_path)
print(f"Cleaned shapefile saved to: {valid_file_path}")
/home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/geopandas.py:662: UserWarning: 'crs' was not provided. The output dataset will not have projection information defined and may not be usable in other systems. write( /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/raw.py:723: RuntimeWarning: Value 270625568 of field POP_EST of feature 0 not successfully written. Possibly due to too larger number with respect to field width ogr_write( /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/raw.py:723: RuntimeWarning: Value 1366417754 of field POP_EST of feature 8 not successfully written. Possibly due to too larger number with respect to field width ogr_write( /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/raw.py:723: RuntimeWarning: Value 1397715000 of field POP_EST of feature 9 not successfully written. Possibly due to too larger number with respect to field width ogr_write( /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/raw.py:723: RuntimeWarning: Value 112078730 of field POP_EST of feature 13 not successfully written. Possibly due to too larger number with respect to field width ogr_write( /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/raw.py:723: RuntimeWarning: Value 211049527 of field POP_EST of feature 44 not successfully written. Possibly due to too larger number with respect to field width ogr_write( /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/raw.py:723: RuntimeWarning: Value 144373535 of field POP_EST of feature 47 not successfully written. Possibly due to too larger number with respect to field width ogr_write( /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/raw.py:723: RuntimeWarning: Value 200963599 of field POP_EST of feature 99 not successfully written. Possibly due to too larger number with respect to field width ogr_write( /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/raw.py:723: RuntimeWarning: Value 216565318 of field POP_EST of feature 108 not successfully written. Possibly due to too larger number with respect to field width ogr_write( /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/raw.py:723: RuntimeWarning: Value 163046161 of field POP_EST of feature 127 not successfully written. Possibly due to too larger number with respect to field width ogr_write( /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/raw.py:723: RuntimeWarning: Value 328239523 of field POP_EST of feature 154 not successfully written. Possibly due to too larger number with respect to field width ogr_write( /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/raw.py:723: RuntimeWarning: Value 127575529 of field POP_EST of feature 156 not successfully written. Possibly due to too larger number with respect to field width ogr_write( /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/raw.py:723: RuntimeWarning: Value 100388073 of field POP_EST of feature 161 not successfully written. Possibly due to too larger number with respect to field width ogr_write( /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/raw.py:723: RuntimeWarning: Value 108116615 of field POP_EST of feature 182 not successfully written. Possibly due to too larger number with respect to field width ogr_write( /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/raw.py:723: RuntimeWarning: Value 126264931 of field POP_EST of feature 189 not successfully written. Possibly due to too larger number with respect to field width ogr_write( /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/.local/lib/python3.10/site-packages/pyogrio/raw.py:723: RuntimeWarning: Value 'মারà§à¦à¦¿à¦¨ যà§à¦à§à¦¤à¦°à¦¾à¦·à§à¦à§à¦° পারà§à¦¶à§à¦¬à¦¬à¦°à§à¦¤à§ à¦à§à¦·à§à¦¦à§à¦° দà§à¦¬à§à¦ªà¦ªà§à¦à§à¦' of field NAME_BN has been truncated to 254 characters. This warning will not be emitted any more for that layer. ogr_write(
Cleaned shapefile saved to: /home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/valid_ne_10m_admin_0_countries.shp
[48]:
import geopandas as gpd
import matplotlib.pyplot as plt
# Load the cleaned shapefile
cleaned_path = '/home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/valid_ne_10m_admin_0_countries.shp'
cleaned_world = gpd.read_file(cleaned_path)
# Verify the dataset
print(cleaned_world.head())
print(cleaned_world.is_valid.value_counts()) # Ensure no invalid geometries remain
# Plot the cleaned dataset
cleaned_world.plot(figsize=(12, 8), color='lightblue', edgecolor='black')
plt.title('World Map with Cleaned Geometries')
plt.show()
featurecla scalerank LABELRANK SOVEREIGNT SOV_A3 ADM0_DIF LEVEL \
0 Admin-0 country 0 2 Indonesia IDN 0 2
1 Admin-0 country 0 3 Malaysia MYS 0 2
2 Admin-0 country 0 2 Chile CHL 0 2
3 Admin-0 country 0 3 Bolivia BOL 0 2
4 Admin-0 country 0 2 Peru PER 0 2
TYPE TLC ADMIN ... FCLASS_TR FCLASS_ID FCLASS_PL \
0 Sovereign country 1 Indonesia ... None None None
1 Sovereign country 1 Malaysia ... None None None
2 Sovereign country 1 Chile ... None None None
3 Sovereign country 1 Bolivia ... None None None
4 Sovereign country 1 Peru ... None None None
FCLASS_GR FCLASS_IT FCLASS_NL FCLASS_SE FCLASS_BD FCLASS_UA \
0 None None None None None None
1 None None None None None None
2 None None None None None None
3 None None None None None None
4 None None None None None None
geometry
0 MULTIPOLYGON (((117.70361 4.16341, 117.70361 4...
1 MULTIPOLYGON (((117.70361 4.16341, 117.69711 4...
2 MULTIPOLYGON (((-69.51009 -17.50659, -69.50611...
3 POLYGON ((-69.51009 -17.50659, -69.51009 -17.5...
4 MULTIPOLYGON (((-69.51009 -17.50659, -69.63832...
[5 rows x 169 columns]
False 249
True 9
Name: count, dtype: int64
[49]:
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt
# Step 2: Load and verify cleaned data
cleaned_path = '/home/6f348dcd-6f88-48fa-ab3c-868ada9b6b8b/valid_ne_10m_admin_0_countries.shp'
cleaned_world = gpd.read_file(cleaned_path)
# Verify the dataset
print(cleaned_world.head())
print("Valid geometries count:\n", cleaned_world.is_valid.value_counts()) # Ensure all geometries are valid
# Step 3: Sample event data
events = {
'Event': ['Event A', 'Event B', 'Event C'],
'Latitude': [40.7128, 34.0522, 41.8781],
'Longitude': [-74.0060, -118.2437, -87.6298]
}
df_events = gpd.GeoDataFrame(
events,
geometry=[Point(xy) for xy in zip(events['Longitude'], events['Latitude'])],
crs="EPSG:4326" # Match CRS of world data
)
# Plot the cleaned world map with events overlay
fig, ax = plt.subplots(figsize=(12, 8))
cleaned_world.plot(ax=ax, color='lightblue', edgecolor='black')
df_events.plot(ax=ax, color='red', marker='o', label='Event Locations', alpha=0.8)
plt.title('World Map with Cleaned Geometries and Event Locations')
plt.legend()
plt.show()
featurecla scalerank LABELRANK SOVEREIGNT SOV_A3 ADM0_DIF LEVEL \
0 Admin-0 country 0 2 Indonesia IDN 0 2
1 Admin-0 country 0 3 Malaysia MYS 0 2
2 Admin-0 country 0 2 Chile CHL 0 2
3 Admin-0 country 0 3 Bolivia BOL 0 2
4 Admin-0 country 0 2 Peru PER 0 2
TYPE TLC ADMIN ... FCLASS_TR FCLASS_ID FCLASS_PL \
0 Sovereign country 1 Indonesia ... None None None
1 Sovereign country 1 Malaysia ... None None None
2 Sovereign country 1 Chile ... None None None
3 Sovereign country 1 Bolivia ... None None None
4 Sovereign country 1 Peru ... None None None
FCLASS_GR FCLASS_IT FCLASS_NL FCLASS_SE FCLASS_BD FCLASS_UA \
0 None None None None None None
1 None None None None None None
2 None None None None None None
3 None None None None None None
4 None None None None None None
geometry
0 MULTIPOLYGON (((117.70361 4.16341, 117.70361 4...
1 MULTIPOLYGON (((117.70361 4.16341, 117.69711 4...
2 MULTIPOLYGON (((-69.51009 -17.50659, -69.50611...
3 POLYGON ((-69.51009 -17.50659, -69.51009 -17.5...
4 MULTIPOLYGON (((-69.51009 -17.50659, -69.63832...
[5 rows x 169 columns]
Valid geometries count:
False 249
True 9
Name: count, dtype: int64
[51]:
# 8. Visual Comparison of Numerical and Categorical Variables by Region
import plotly.express as px
import pandas as pd
# Sample dataset with numerical and categorical variables (Population and Region)
data = {
'Country': ['USA', 'China', 'India', 'Brazil', 'Germany', 'UK', 'France', 'Italy', 'Spain', 'Australia'],
'Population': [331002651, 1439323776, 1380004385, 212559417, 83783942, 67886011, 65273511, 60244639, 46719142, 25499884],
'Region': ['North America', 'Asia', 'Asia', 'South America', 'Europe', 'Europe', 'Europe', 'Europe', 'Europe', 'Oceania']
}
# Convert to DataFrame
df = pd.DataFrame(data)
# Visualizing Population by Region on a world map
fig = px.choropleth(df,
locations="Country",
locationmode="country names", # Specify countries
color="Population", # Numerical variable
hover_name="Country", # Show country name on hover
color_continuous_scale="Viridis", # Color scale
animation_frame="Region", # Animate by region
title="World Map: Population by Region"
)
# Show the plot
fig.show()
[53]:
# Day 3: Visualizing Network Data
# Task 9: Network Graph Visualization
# Objective:Create a network graph using `networkx
import networkx as nx
import matplotlib.pyplot as plt
# Create a simple graph
G = nx.Graph()
# Add nodes and edges
G.add_edges_from([(1, 2), (2, 3), (3, 4), (4, 1)])
# Draw the graph
nx.draw(G, with_labels=True)
plt.show()
[56]:
# Task 9b: Visualizing a Network of Friendships
import networkx as nx
import matplotlib.pyplot as plt
# Create a graph object
G = nx.Graph()
# Add nodes (people)
G.add_nodes_from(['Alice', 'Bob', 'Charlie', 'David', 'Eve'])
# Add edges (friendships) between people
G.add_edges_from([('Alice', 'Bob'),
('Alice', 'Charlie'),
('Bob', 'David'),
('Charlie', 'David'),
('Eve', 'Charlie')])
# Draw the network
plt.figure(figsize=(8, 6))
nx.draw(G, with_labels=True, node_color='skyblue', node_size=2000, font_size=14, font_weight='bold', edge_color='gray')
# Display the plot
plt.title("Social Network of Friendships")
plt.show()
[54]:
# Task 10: Visualizing Central Nodes in a Social Network
# Objective:Identify and highlight central nodes (influencers) in a network
# Compute degree centrality
centrality = nx.degree_centrality(G)
# Draw the graph with nodes sized by centrality
node_sizes = [centrality[node] * 1000 for node in G.nodes]
nx.draw(G, with_labels=True, node_size=node_sizes, node_color='skyblue')
plt.show()
[58]:
# Task 10b: Visualizing Central Nodes (Influencers)
# Objective: The goal is to identify and highlight central nodes (influencers) in the network
import networkx as nx
import matplotlib.pyplot as plt
# Create a graph object
G = nx.Graph()
# Add nodes and edges (friendships or interactions)
G.add_edges_from([('Alice', 'Bob'),
('Alice', 'Charlie'),
('Bob', 'David'),
('Charlie', 'David'),
('Eve', 'Charlie'),
('Charlie', 'Frank')])
# Calculate degree centrality
centrality = nx.degree_centrality(G)
# Get the node with the highest centrality
most_central_node = max(centrality, key=centrality.get)
# Draw the network
plt.figure(figsize=(7, 6))
nx.draw(G, with_labels=True, node_size=2000, node_color='skyblue', font_size=14, font_weight='bold', edge_color='gray')
# Highlight the most central node
plt.title(f"Most Central Node: {most_central_node}")
plt.show()
# Print centrality values
print("Degree Centrality of Nodes:")
for node, value in centrality.items():
print(f"{node}: {value:.2f}")
Degree Centrality of Nodes: Alice: 0.40 Bob: 0.40 Charlie: 0.80 David: 0.40 Eve: 0.20 Frank: 0.20
[10]:
# To help resolve the issue with `community` not being found, let's identify the current environment and verify the
# presence of the `community` package. Let's check the installed packages and see if `community` (or python-louvain) is listed.
!pip list | grep "community\|python-louvain"
[11]:
import community as community_louvain
print("python-louvain module loaded successfully")
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) Cell In[11], line 1 ----> 1 import community as community_louvain 2 print("python-louvain module loaded successfully") ModuleNotFoundError: No module named 'community'
[8]:
# Task 11: Detecting Communities in a Social Network**
# Objective: To identify communities or clusters within a social network
import networkx as nx
import matplotlib.pyplot as plt
import community as community_louvain
# Create a graph object
G = nx.Graph()
# Add nodes and edges (friendships or interactions)
G.add_edges_from([('Alice', 'Bob'),
('Alice', 'Charlie'),
('Bob', 'David'),
('Charlie', 'David'),
('Eve', 'Charlie'),
('Charlie', 'Frank')])
# Use the Louvain method for community detection
partition = community_louvain.best_partition(G)
# Draw the network with communities colored
plt.figure(figsize=(8, 8))
# Get community colors
colors = [partition[node] for node in G.nodes()]
# Draw nodes and edges with colors based on communities
nx.draw(G, with_labels=True, node_size=2000, node_color=colors, cmap=plt.cm.rainbow, font_size=14, font_weight='bold', edge_color='gray')
plt.title("Community Detection in Network")
plt.show()
# Print the detected communities
print("Detected Communities:")
for node, comm in partition.items():
print(f"{node}: Community {comm}")
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) Cell In[8], line 5 3 import networkx as nx 4 import matplotlib.pyplot as plt ----> 5 import community as community_louvain 7 # Create a graph object 8 G = nx.Graph() ModuleNotFoundError: No module named 'community'
[3]:
# Task 12: Significance of Edge Thickness and Node Size in Network Visualizations
# Objective: Explore how adjusting edge thickness and node size can help emphasize different aspects of a network
import networkx as nx
import matplotlib.pyplot as plt
# Create a graph object
G = nx.Graph()
# Add nodes and edges with different weights (edge weights represent strength of connection)
G.add_edge('Alice', 'Bob', weight=2)
G.add_edge('Alice', 'Charlie', weight=4)
G.add_edge('Bob', 'David', weight=1)
G.add_edge('Charlie', 'David', weight=3)
G.add_edge('Eve', 'Charlie', weight=2)
G.add_edge('Charlie', 'Frank', weight=5)
# Get edge weights for visualization
edge_weights = [G[u][v]['weight'] for u, v in G.edges()]
# Draw the network with varying edge thickness based on weights
plt.figure(figsize=(7, 6))
# Adjust node size based on degree centrality
node_size = [1000 * G.degree(node) for node in G.nodes()]
# Draw the network with edge width based on weight and node size based on degree
nx.draw(G, with_labels=True, node_size=node_size, font_size=14, font_weight='bold', edge_color='gray', width=edge_weights, node_color='skyblue')
plt.title("Network with Edge Thickness and Node Size")
plt.show()
[7]:
# Task 13: Visualizing Hierarchical Data (Tree/Sunburst Chart)
# Objective:Visualize hierarchical data using `plotly`
import pandas as pd
import plotly.express as px
# Sample hierarchical data (product categories and subcategories)
data = {
'Parent': ['Root', 'Root', 'Electronics', 'Electronics', 'Furniture', 'Furniture'],
'Child': ['Electronics', 'Furniture', 'Mobile', 'Laptop', 'Chair', 'Table'],
'Value': [10, 15, 5, 5, 8, 7]
}
df_hierarchy = pd.DataFrame(data)
# Plot sunburst chart
fig = px.sunburst(df_hierarchy, path=['Parent', 'Child'], values='Value')
fig.show()
[3]:
# Day 4: Visualizing Hierarchical Data
# Task 14: Visualizing the Hierarchical Relationship Between Products in a Retail Business
import plotly.express as px
import pandas as pd
# Sample dataset representing a product hierarchy
data = {
"Category": ["Electronics", "Electronics", "Electronics", "Clothing", "Clothing", "Clothing", "Furniture"],
"Subcategory": ["Phones", "Laptops", "Tablets", "Men", "Women", "Kids", "Chairs"],
"Product": ["iPhone", "MacBook", "iPad", "Shirts", "Dresses", "Uniforms", "Office Chair"],
"Sales": [12000, 18000, 15000, 9000, 8000, 5000, 10000]
}
# Create a DataFrame
df = pd.DataFrame(data)
# Create a sunburst chart
fig = px.sunburst(df, path=['Category', 'Subcategory', 'Product'], values='Sales',
title="Product Hierarchy in Retail Business")
fig.update_layout(margin=dict(t=0, l=0, r=0, b=0))
fig.show()
[6]:
# Day 4: Visualizing Hierarchical Data
# Task 15: Visualizing a Decision Tree Hierarchy
# Example Code for Decision Tree Visualization
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
# Load sample data (using Iris dataset as a placeholder for product/customer decision data)
data = load_iris()
X = data.data
y = data.target
# Train a decision tree classifier
clf = DecisionTreeClassifier(max_depth=3, random_state=0)
clf.fit(X, y)
# Plot the decision tree
plt.figure(figsize=(12, 8))
plot_tree(clf, feature_names=data.feature_names, class_names=list(data.target_names), filled=True, rounded=True)
plt.title("Decision Tree for Customer Decision Making")
plt.show()
[ ]:
